In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import scipy
import os
import scipy.io as sio
In [2]:
sc.settings.verbosity = 1 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=100, fontsize=10, dpi_save=300, figsize=(4,4), format='png')
In [3]:
data_path='./9301-CT_Output/all-well/DGE_filtered/'
# The DGE_filtered folder contains the expression matrix, genes, and files 
adata4 = sc.read_mtx(data_path+'DGE.mtx')

# reading in gene and cell data
gene_data = pd.read_csv(data_path + 'all_genes.csv')
cell_meta = pd.read_csv(data_path + 'cell_metadata.csv')

# find genes with nan values and filter
gene_data = gene_data[gene_data.gene_name.notnull()]
notNa = gene_data.index
notNa = notNa.to_list()

# remove genes with nan values and assign gene names
adata4 = adata4[:,notNa]
adata4.var = gene_data
adata4.var.set_index('gene_name', inplace=True)
adata4.var.index.name = None
adata4.var_names_make_unique()

# add cell meta data to anndata object
adata4.obs = cell_meta
adata4.obs.set_index('bc_wells', inplace=True)
adata4.obs.index.name = None
adata4.obs_names_make_unique()

sc.pp.filter_cells(adata4, min_genes=300)
sc.pp.filter_genes(adata4, min_cells=5)

# Returns the dimensions of the expression matrix (cells, genes)
adata4.shape
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:798: UserWarning: 
AnnData expects .var.index to contain strings, but got values like:
    [0, 1, 2, 3, 4]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)
Out[3]:
(17201, 29390)
In [4]:
data_path='./10268-CT/Jason_252_12_WTm/DGE_filtered/'
# The DGE_filtered folder contains the expression matrix, genes, and files 
adata2 = sc.read_mtx(data_path+'DGE.mtx')

# reading in gene and cell data
gene_data = pd.read_csv(data_path + 'all_genes.csv')
cell_meta = pd.read_csv(data_path + 'cell_metadata.csv')

# find genes with nan values and filter
gene_data = gene_data[gene_data.gene_name.notnull()]
notNa = gene_data.index
notNa = notNa.to_list()

# remove genes with nan values and assign gene names
#adata2 = adata2[:,notNa]
adata2.var = gene_data
adata2.var.set_index('gene_name', inplace=True)
adata2.var.index.name = None
adata2.var_names_make_unique()

# add cell meta data to anndata object
adata2.obs = cell_meta
adata2.obs.set_index('bc_wells', inplace=True)
adata2.obs.index.name = None
adata2.obs_names_make_unique()

sc.pp.filter_cells(adata2, min_genes=300)
sc.pp.filter_genes(adata2, min_cells=1)

# Returns the dimensions of the expression matrix (cells, genes)
adata2.shape
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:798: UserWarning: 
AnnData expects .var.index to contain strings, but got values like:
    [0, 1, 2, 3, 4]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)
Out[4]:
(5420, 26265)
In [5]:
data_path='./10268-CT/Jason_252_34_YT2/DGE_filtered/'
# The DGE_filtered folder contains the expression matrix, genes, and files 
adata3 = sc.read_mtx(data_path+'DGE.mtx')

# reading in gene and cell data
gene_data = pd.read_csv(data_path + 'all_genes.csv')
cell_meta = pd.read_csv(data_path + 'cell_metadata.csv')

# find genes with nan values and filter
gene_data = gene_data[gene_data.gene_name.notnull()]
notNa = gene_data.index
notNa = notNa.to_list()

# remove genes with nan values and assign gene names
#adata3 = adata3[:,notNa]
adata3.var = gene_data
adata3.var.set_index('gene_name', inplace=True)
adata3.var.index.name = None
adata3.var_names_make_unique()

# add cell meta data to anndata object
adata3.obs = cell_meta
adata3.obs.set_index('bc_wells', inplace=True)
adata3.obs.index.name = None
adata3.obs_names_make_unique()

sc.pp.filter_cells(adata3, min_genes=300)
sc.pp.filter_genes(adata3, min_cells=5)

# Returns the dimensions of the expression matrix (cells, genes)
adata3.shape
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:798: UserWarning: 
AnnData expects .var.index to contain strings, but got values like:
    [0, 1, 2, 3, 4]

    Inferred to be: integer

  value_idx = self._prep_dim_index(value.index, attr)
Out[5]:
(4534, 19364)
In [6]:
adata3
Out[6]:
AnnData object with n_obs × n_vars = 4534 × 19364
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'mread_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes'
    var: 'gene_id', 'genome', 'n_cells'
In [7]:
sc.external.pp.scrublet(adata2)
sc.external.pp.scrublet(adata3)
sc.external.pp.scrublet(adata4)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.
  view_to_actual(adata)
Automatically set threshold at doublet score = 0.55
Detected doublet rate = 0.1%
Estimated detectable doublet fraction = 0.3%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 17.6%
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.
  view_to_actual(adata)
Automatically set threshold at doublet score = 0.44
Detected doublet rate = 0.0%
Estimated detectable doublet fraction = 0.5%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 8.2%
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.
  view_to_actual(adata)
Automatically set threshold at doublet score = 0.68
Detected doublet rate = 0.0%
Estimated detectable doublet fraction = 0.3%
Overall doublet rate:
	Expected   = 5.0%
	Estimated  = 2.1%
In [8]:
adata2 = adata2[adata2.obs.doublet_score < 0.1, :]
adata3 = adata3[adata3.obs.doublet_score < 0.1, :]
adata4 = adata4[adata4.obs.doublet_score < 0.1, :]
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [9]:
adata5 = adata2.concatenate(adata3, batch_key='sample', batch_categories = ['252 1/2', '252 3/4'], join='outer')
adata5
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:1785: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],
Out[9]:
AnnData object with n_obs × n_vars = 7876 × 26304
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'mread_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet'
    var: 'gene_id-252 1/2', 'genome-252 1/2', 'n_cells-252 1/2', 'gene_id-252 3/4', 'genome-252 3/4', 'n_cells-252 3/4'
In [10]:
adata = adata4.concatenate(adata5, batch_key='batch', batch_categories = ['1', '2'], join='outer')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:1785: FutureWarning: X.dtype being converted to np.float32 from float64. In the next version of anndata (0.9) conversion will not be automatic. Pass dtype explicitly to avoid this warning. Pass `AnnData(X, dtype=X.dtype, ...)` to get the future behavour.
  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],
In [11]:
adata.var['mt'] = adata.var_names.str.startswith('mt-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
In [12]:
adata = adata[adata.obs.n_genes_by_counts < 5000,:]
adata = adata[adata.obs.total_counts < 20000,:]
adata = adata[adata.obs.pct_counts_mt < 15,:]
adata.shape 
Out[12]:
(23218, 30759)
In [13]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_normalization.py:170: UserWarning: Received a view of an AnnData. Making a copy.
  view_to_actual(adata)
In [14]:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.25)
sc.pl.highly_variable_genes(adata, save='') # scanpy generates the filename automatically

# Save raw expression values before variable gene subset
adata.raw = adata
WARNING: saving figure to file figures/filter_genes_dispersion.png
In [15]:
sc.pp.regress_out(adata, ['tscp_count', 'pct_counts_mt'], n_jobs=12)
sc.pp.scale(adata, max_value=10)
In [16]:
sc.tl.pca(adata, svd_solver='arpack')
sc.external.pp.harmony_integrate(adata, 'batch', max_iter_harmony=50)
sc.pp.neighbors(adata, use_rep='X_pca_harmony', n_neighbors=10, n_pcs=35, random_state=0)
sc.tl.umap(adata, random_state=0)
sc.tl.leiden(adata, resolution=0.6, random_state=0)
sc.pl.umap(adata, color=['leiden'], legend_fontsize=8)
2023-08-20 09:53:55,942 - harmonypy - INFO - Iteration 1 of 50
2023-08-20 09:53:58,945 - harmonypy - INFO - Iteration 2 of 50
2023-08-20 09:54:02,384 - harmonypy - INFO - Iteration 3 of 50
2023-08-20 09:54:05,328 - harmonypy - INFO - Iteration 4 of 50
2023-08-20 09:54:08,141 - harmonypy - INFO - Iteration 5 of 50
2023-08-20 09:54:11,323 - harmonypy - INFO - Iteration 6 of 50
2023-08-20 09:54:14,642 - harmonypy - INFO - Iteration 7 of 50
2023-08-20 09:54:17,673 - harmonypy - INFO - Iteration 8 of 50
2023-08-20 09:54:20,393 - harmonypy - INFO - Iteration 9 of 50
2023-08-20 09:54:23,190 - harmonypy - INFO - Converged after 9 iterations
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
In [17]:
adata
Out[17]:
AnnData object with n_obs × n_vars = 23218 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [18]:
sc.set_figure_params(dpi=100, figsize=(4,4))
sc.pl.umap(adata, color=['leiden'], title='')
In [19]:
sc.pl.umap(adata, color=['sample'], title='')
In [20]:
sc.pl.umap(adata, color=['batch'], groups = '1', title='')
sc.pl.umap(adata, color=['batch'], groups = '2', title='')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
In [21]:
pd.crosstab(adata.obs['sample'], adata.obs['leiden'])
Out[21]:
leiden 0 1 2 3 4 5 6 7 8 9 ... 12 13 14 15 16 17 18 19 20 21
sample
35-2-34-3 0 0 0 0 0 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 0
252 1/2 717 1486 20 626 363 85 156 186 10 11 ... 11 5 2 24 58 1 44 98 64 29
252 3/4 615 1173 27 860 215 79 127 62 16 0 ... 6 9 5 21 63 1 25 36 59 9
252-7 3 65 91 170 31 44 0 1 41 11 ... 0 1 0 18 2 4 0 0 0 2
252-8 0 16 17 12 1 0 0 0 6 3 ... 0 0 1 1 1 5 0 0 0 1
252-9 5 272 124 152 141 314 2 0 92 6 ... 3 6 0 13 5 1 0 0 5 6
263-4 0 23 62 6 2 0 0 0 18 63 ... 1 1 1 1 1 8 0 0 0 1
263-6 335 108 121 117 186 119 209 278 97 5 ... 30 52 13 30 23 16 17 3 9 14
263-7 266 106 338 15 152 71 168 127 116 1 ... 60 189 52 19 18 45 31 11 4 17
264-3 770 82 318 19 215 145 130 101 118 30 ... 118 20 91 26 14 38 26 18 4 14
273-7 632 50 114 8 75 154 32 26 111 79 ... 115 78 25 6 14 11 15 3 3 6
273-8 236 65 11 25 77 103 36 79 28 2 ... 24 49 3 7 6 2 11 2 3 4
307-1-261-2 0 1 2 4 0 0 2 0 0 56 ... 0 0 0 0 0 2 0 0 0 0
307-2-286-1 3 19 180 14 6 0 13 0 12 394 ... 2 0 0 4 1 0 0 0 6 0
2704- 21 0 335 0 5 1 11 9 23 6 ... 3 1 18 2 3 29 1 1 2 0
2705- 9 8 401 1 4 5 8 9 56 6 ... 2 1 92 1 3 44 2 0 0 1
2706- 234 8 299 4 8 5 4 12 27 4 ... 45 3 42 0 2 6 23 0 2 1
3083-34-4 0 0 1 0 0 0 0 0 0 13 ... 0 0 0 0 0 0 0 0 0 0
C57-1 177 76 28 29 146 46 51 40 42 13 ... 24 4 2 38 6 4 3 3 2 7
C57-2 20 138 7 87 70 57 17 12 11 8 ... 1 0 0 15 1 0 0 0 2 3
C57-15 2 35 22 13 9 5 0 0 12 9 ... 0 1 2 4 4 1 0 0 0 1
C57-25 2 148 32 127 38 19 1 2 48 28 ... 0 1 1 34 3 6 0 1 0 2

22 rows × 22 columns

In [22]:
adata = adata[adata.obs['sample'].isin([
'C57-1',
'C57-15',
'C57-2',
'C57-25',
'263-6',
'263-7',
'2704-',
'252-9',
'263-4',
'264-3',
'273-7',
'273-8',
'2705-',
'2706-',
'252-7',
'252-8',
'252 1/2',
'252 3/4'
])]
adata
Out[22]:
View of AnnData object with n_obs × n_vars = 22444 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [24]:
adata.obs.loc[adata.obs["sample"].isin(['C57-1',
'C57-15',
'C57-2',
'C57-25']), "genotype"] = "C57B6"
In [25]:
adata.obs.loc[adata.obs["sample"].isin(['263-6',
'263-7',
'2704-',
'252-9',
'263-4', '252 1/2']), "genotype"] = "WT"
In [26]:
adata.obs.loc[adata.obs["sample"].isin(['264-3',
'273-7',
'273-8',
'2705-',
'2706-',
'252-7',
'252-8', '252 3/4']), "genotype"] = "YT-deleted"
In [27]:
adata.obs.loc[adata.obs["sample"].isin(['C57-2',
'C57-25','252-9',
'263-4','252-7',
'252-8', '252 1/2', '252 3/4']), "treatment"] = "Saline"
In [28]:
adata.obs.loc[adata.obs["sample"].isin(['C57-1',
'C57-15','263-6',
'263-7',
'2704-','264-3',
'273-7',
'273-8',
'2705-',
'2706-']), "treatment"] = "Bleomycin"
In [29]:
adata.obs.loc[adata.obs["sample"].isin(['C57-2',
'C57-25',
'252-9',
'263-4', '252 1/2']), "group"] = "Control Veh"

adata.obs.loc[adata.obs["sample"].isin(['252-7',
'252-8', '252 3/4']), "group"] = "YT-deleted Veh"

adata.obs.loc[adata.obs["sample"].isin(['C57-1',
'C57-15','263-6',
'263-7',
'2704-']), "group"] = "Control bleo"

adata.obs.loc[adata.obs["sample"].isin(['264-3',
'273-7',
'273-8',
'2705-',
'2706-']), "group"] = "YT-deleted bleo"
In [30]:
#reembed and cluster subset
sc.tl.pca(adata, svd_solver='arpack')
sc.external.pp.harmony_integrate(adata, 'batch', max_iter_harmony=50)
sc.pp.neighbors(adata, use_rep='X_pca_harmony', n_neighbors=10, n_pcs=35, random_state=0)
sc.tl.umap(adata, random_state=0)
sc.tl.leiden(adata, resolution=0.6, random_state=0)
sc.pl.umap(adata, color=['leiden'], legend_fontsize=8)
2023-08-20 09:58:36,291 - harmonypy - INFO - Iteration 1 of 50
2023-08-20 09:58:39,170 - harmonypy - INFO - Iteration 2 of 50
2023-08-20 09:58:42,041 - harmonypy - INFO - Iteration 3 of 50
2023-08-20 09:58:46,017 - harmonypy - INFO - Iteration 4 of 50
2023-08-20 09:58:48,696 - harmonypy - INFO - Iteration 5 of 50
2023-08-20 09:58:51,456 - harmonypy - INFO - Iteration 6 of 50
2023-08-20 09:58:54,680 - harmonypy - INFO - Iteration 7 of 50
2023-08-20 09:58:57,618 - harmonypy - INFO - Iteration 8 of 50
2023-08-20 09:59:00,473 - harmonypy - INFO - Iteration 9 of 50
2023-08-20 09:59:03,431 - harmonypy - INFO - Converged after 9 iterations
In [31]:
adata
Out[31]:
AnnData object with n_obs × n_vars = 22444 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [32]:
sc.set_figure_params(dpi=100, figsize=(4,4))
sc.pl.umap(adata, color=['leiden'], title='')
In [33]:
sc.pl.umap(adata, color=['group'], title='')
In [34]:
sc.pl.umap(adata, color=['group'], groups = 'Control Veh', title='Control PBS')
sc.pl.umap(adata, color=['group'], groups = 'YT-deleted Veh', title='YT-deleted PBS')
sc.pl.umap(adata, color=['group'], groups = 'Control bleo', title='Control Bleo')
sc.pl.umap(adata, color=['group'], groups = 'YT-deleted bleo', title='YT-deleted Bleo')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
In [55]:
marker_genes = ['Ptprc', 'Col1a1', 'Pecam1', 'Epcam', 'Nkx2-1', "Sox2", 'Sox9', 'Trp63', 'Foxj1', 'Scgb1a1', 'Scgb3a1', 'Scgb3a2', 'Muc5b', 'Abca3', 'Sftpc', 'Sftpd', 'Lamp3', 'Krt8', 'Cdkn1a', 'Gdf15', 'Cldn4', 'Lgals3', 'Sox4', 'Hopx', 'Ager', 'Rtkn2', 'Vegfa', 'Wnt3a', 'Pdgfa', 'Col4a3', 'Col4a4', 'Pou2f3', 'Pdgfra', 'Wnt2', 'Tcf21', 'Scube2', 'Wnt5a', 'Pi16', 'Sfrp4','Col1a2', 'Col3a1', 'Col6a1', 'Cthrc1', 'Fn1', "Aspn", 'Wif1', 'Acta2', 'Cspg4', 'Pdgfrb', 'Lgr5', 'Lgr6', 'Tgfbi', 'Wt1', 'Plvap', 'Hey1',  'Car4', 'Itgam', 'Itgax', 'Cd68', 'Cd14', 'Cd86', 'Pparg', 'Spp1', 'Cpa3', 'Prf1', 'Cd3e', 'Cd4', 'Cd8a', 'Il7r', 'Foxp3', 'Ms4a1', 'Jchain', 'Irf7', 'Cd34', 'Ly6a', 'Plp1', 'Cdh4', 'Csmd1', 'Mki67']
In [36]:
sc.tl.dendrogram(adata, groupby='leiden')
sc.pl.dotplot(adata, marker_genes, groupby='leiden', standard_scale='var', cmap='YlGnBu', dendrogram=True)
In [37]:
sc.pl.matrixplot(adata, marker_genes, groupby='leiden', standard_scale='var', cmap='YlGnBu', dendrogram=True)
In [38]:
sc.pl.umap(adata, color=['leiden'], title='', legend_loc='on data')
In [ ]:
#sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
#sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
In [43]:
sc.tl.leiden(adata,restrict_to=('leiden', ['6']), resolution=0.2, key_added='subcluster')
sc.tl.leiden(adata,restrict_to=('subcluster', ['7']), resolution=0.2, key_added='subcluster1')
sc.tl.leiden(adata,restrict_to=('subcluster1', ['3']), resolution=0.2, key_added='subcluster2')

sc.pl.umap(adata, color=['subcluster2'], title='', legend_loc='on data')
In [44]:
sc.tl.dendrogram(adata, 'subcluster2')
sc.pl.matrixplot(adata, marker_genes, groupby='subcluster2', standard_scale='var', cmap='YlGnBu', dendrogram=True)
In [ ]:
 
In [45]:
sc.tl.rank_genes_groups(adata, 'subcluster2', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:394: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'names'] = self.var_names[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'scores'] = scores[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:399: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals'] = pvals[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:409: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:420: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  self.stats[group_name, 'logfoldchanges'] = np.log2(
In [46]:
#remove low-quality/doublet (19,22)
adata = adata[adata.obs['leiden'].isin(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21'])]
In [53]:
sc.tl.leiden(adata,restrict_to=('subcluster2', ['6,1']), resolution=0.2, key_added='subcluster3')
In [56]:
sc.tl.dendrogram(adata, 'subcluster3')
sc.pl.matrixplot(adata, marker_genes, groupby='subcluster3', standard_scale='var', cmap='YlGnBu', dendrogram=True)
In [57]:
#remove low-quality/doublet ('7,3', 19,22)
adata = adata[adata.obs['subcluster3'].isin(['0', '1', '2', '3,0', '3,1', '3,2', '4', '5', '6,0', '6,1,0', '6,1,2', '6,1,2', '7,0', '7,1', '7,2', '7,4', '7,5', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '20', '21'])]
In [ ]:
 
In [58]:
tmp = adata.obs['subcluster3'].cat.categories

tmp = ['Intermediate FB' if item == '0' else item for item in tmp]
tmp = ['Alveolar FB' if item == '1' else item for item in tmp]
tmp = ['AT2' if item == '2' else item for item in tmp]
tmp = ['iMO' if item == '3,0' else item for item in tmp]
tmp = ['MDM' if item == '3,1' else item for item in tmp]
tmp = ['AM' if item == '3,2' else item for item in tmp]
tmp = ['Adventitial FB' if item == '4' else item for item in tmp]
tmp = ['MyoFB' if item == '5' else item for item in tmp]
tmp = ['Intermediate alveolar - stressed' if item == '6,0' else item for item in tmp]
tmp = ['AT1 - mature' if item == '6,1,0' else item for item in tmp]
tmp = ['AT1 - immature' if item == '6,1,2' else item for item in tmp]
tmp = ['Intermediate alveolar - cell-cycle arrested' if item == '6,1,1' else item for item in tmp]
tmp = ['moDC' if item == '7,0' else item for item in tmp]
tmp = ['T cells' if item == '7,1' else item for item in tmp]
tmp = ['Treg' if item == '7,2' else item for item in tmp]
tmp = ['NK' if item == '7,4' else item for item in tmp]
tmp = ['Mast' if item == '7,5' else item for item in tmp]
tmp = ['Intermediate alveolar' if item == '8' else item for item in tmp]
tmp = ['Mesothelial' if item == '9' else item for item in tmp]
tmp = ['MCC' if item == '10' else item for item in tmp]
tmp = ['FB - proliferating' if item == '11' else item for item in tmp]
tmp = ['Plasma/pDC' if item == '12' else item for item in tmp]
tmp = ['cDC' if item == '13' else item for item in tmp]
tmp = ['Capillary' if item == '14' else item for item in tmp]
tmp = ['Pericyte' if item == '15' else item for item in tmp]
tmp = ['SMC' if item == '16' else item for item in tmp]
tmp = ['Macrophage - proliferating' if item == '17' else item for item in tmp]
tmp = ['Activated FB' if item == '18' else item for item in tmp]
tmp = ['Secretory' if item == '20' else item for item in tmp]
tmp = ['Glial' if item == '21' else item for item in tmp]



adata.rename_categories('subcluster3', tmp)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/_core/anndata.py:1160: FutureWarning: The `inplace` parameter in pandas.Categorical.rename_categories is deprecated and will be removed in a future version. Removing unused categories will always return a new Categorical object.
  self.obs[key].cat.rename_categories(categories, inplace=True)
In [59]:
sc.set_figure_params(figsize=(5,5))
sc.pl.umap(adata, color=['subcluster3'], title='')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
In [ ]:
sc.set_figure_params(figsize=(4,4))
sc.pl.umap(adata, color=['subcluster3'], title='', legend_fontsize='small', save='celltype.png')
In [61]:
pd.crosstab(adata.obs['subcluster3'], adata.obs['group'], normalize='columns')
Out[61]:
group Control Veh Control bleo YT-deleted Veh YT-deleted bleo
subcluster3
Intermediate FB 0.169226 0.169294 0.205804 0.310621
Alveolar FB 0.274109 0.051566 0.247642 0.025654
AT2 0.167230 0.033080 0.255865 0.009314
iMO 0.019042 0.067523 0.018380 0.078105
MDM 0.000614 0.053707 0.001451 0.080556
AM 0.017813 0.042226 0.012817 0.026634
Adventitial FB 0.077703 0.086593 0.044256 0.053105
MyoFB 0.073710 0.048453 0.029988 0.068301
Intermediate alveolar - stressed 0.009214 0.060712 0.010157 0.024183
AT1 - mature 0.013974 0.007200 0.017170 0.005556
AT1 - immature 0.001382 0.002335 0.000000 0.001307
moDC 0.006910 0.014205 0.005320 0.029412
T cells 0.010749 0.025491 0.005804 0.009150
Treg 0.005068 0.009535 0.001935 0.014869
NK 0.000921 0.003308 0.000000 0.000000
Mast 0.000154 0.001751 0.000000 0.001961
Intermediate alveolar 0.015356 0.086398 0.010399 0.034804
Mesothelial 0.026259 0.042421 0.038694 0.025490
MCC 0.035319 0.034442 0.020073 0.015033
FB - proliferating 0.002457 0.022962 0.001451 0.050817
Plasma/pDC 0.002150 0.047869 0.002418 0.024837
cDC 0.004760 0.017513 0.005320 0.042647
Capillary 0.017506 0.006616 0.003386 0.019771
Pericyte 0.013206 0.018097 0.010157 0.006536
SMC 0.010442 0.010508 0.015719 0.006536
Macrophage - proliferating 0.002457 0.018875 0.002418 0.016503
Activated FB 0.004760 0.007589 0.014994 0.011928
Secretory 0.011057 0.003308 0.014752 0.001961
Glial 0.006450 0.006421 0.003628 0.004412
In [62]:
adata.obs['subcluster3'].cat.reorder_categories([
'Activated FB',
'Adventitial FB',
'Alveolar FB',
'AM',
'AT1 - immature',
'AT1 - mature',
'AT2',
'Capillary',
'cDC',
'FB - proliferating',
'Glial',
'iMO',
'Intermediate alveolar',
'Intermediate alveolar - stressed',
'Intermediate FB',
'Macrophage - proliferating',
'Mast',
'MCC',
'MDM',
'Mesothelial',
'moDC',
'MyoFB',
'NK',
'Pericyte',
'Plasma/pDC',
'Secretory',
'SMC',
'T cells',
'Treg'
], inplace=True)
<ipython-input-62-84d32cded238>:1: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.
  adata.obs['subcluster3'].cat.reorder_categories([
In [65]:
sc.set_figure_params(figsize=(4,4))
In [68]:
sc.pl.umap(adata, color=['group'], legend_fontsize='small', title='')
sc.pl.umap(adata, color=['subcluster3'], legend_fontsize='x-small', title='', save='celltype.png')
WARNING: saving figure to file figures/umapcelltype.png
In [69]:
results_file='./yap_deleted_20230820.h5ad'
adata.write(results_file)
In [ ]:
 
In [70]:
sc.tl.embedding_density(adata, groupby='group')
In [71]:
sc.pl.embedding_density(adata, groupby='group', save='embedding_density.png')
WARNING: saving figure to file figures/umap_density_group_embedding_density.png
In [72]:
sc.pl.umap(adata, color=['group'], groups='Control Veh', legend_fontsize='small', title='Control PBS', save='control_pbs.png')
sc.pl.umap(adata, color=['group'], groups='YT-deleted Veh', legend_fontsize='small', title='YT PBS', save='yt_pbs.png')
sc.pl.umap(adata, color=['group'], groups='Control bleo', legend_fontsize='small', title='Control Bleomycin', save='control_bleo.png')
sc.pl.umap(adata, color=['group'], groups='YT-deleted bleo', legend_fontsize='small', title='YT Bleomycin', save='yt_bleo.png')
WARNING: saving figure to file figures/umapcontrol_pbs.png
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
WARNING: saving figure to file figures/umapyt_pbs.png
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
WARNING: saving figure to file figures/umapcontrol_bleo.png
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
WARNING: saving figure to file figures/umapyt_bleo.png
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: In a future version of pandas all arguments of Categorical.replace except for the argument 'value' will be keyword-only.
  values = values.replace(values.categories.difference(groups), np.nan)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1171: FutureWarning: Categorical.replace is deprecated and will be removed in a future version. Use Series.replace directly instead.
  values = values.replace(values.categories.difference(groups), np.nan)
In [ ]:
 
In [73]:
stromal = adata[adata.obs['subcluster3'].isin([
'Alveolar FB', 'Activated FB', 'Adventitial FB', 'FB - proliferating', 'Intermediate FB', 'MyoFB', 'Pericyte', 'SMC'
])]
stromal
Out[73]:
View of AnnData object with n_obs × n_vars = 11829 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [74]:
fb_genes = ['Pdgfra', 'Tcf21', 'Lgr5', 'Lgr6', 'Acta2', 'Tgfbi', 'Tagln', 'Pi16', 'Sfrp4', 'Mfap5', 'Lum', 'Col1a1', 'Col1a2', 'Col3a1', 'Fn1', 'Eln', 'Fgfr4', 'Bmp5', 'Axin2', 'Rspo2', 'Wnt2', 'Wnt5a', 'Wnt7b', 'Fgf2', 'Fgf7', 'Fgf10', 'Fgf18', 'Tgfb1', 'Tgfb2', 'Tgfb3', 'Ccn2', 'Cthrc1', 'Fap', 'Loxl2', 'Snai1', 'Snai2', 'Twist1', 'Gdf15', 'Mki67' ]

fb_genes2 = ['Pdgfra', 'Scube2', 'Tcf21', 'Lgr5', 'Lgr6', 'Acta2', 'Tgfbi', 'Tagln', 'Pi16', 'Ccl11', 'Adh7', 'Sfrp4', 'Mfap5', 'Lum', 'Cthrc1', 'Col1a1', 'Col1a2', 'Col3a1', 'Fn1', 'Eln', 'Rspo2', 'Wnt2', 'Wnt5a', 'Tgfb1', 'Tgfb2', 'Mki67']
In [75]:
fb_genes2 = ['Pdgfra', 'Scube2', 'Tcf21', 'Wnt2','Pi16', 'Sfrp4', 'Mfap5', 'Lum', 'Cthrc1', 'Fap', 'Col1a1', 'Col1a2', 'Col3a1', 'Fn1',  'Wnt5a', 'Aspn', 'Tgfb1', 'Tgfb2', 'Mki67',  'Lgr6', 'Acta2', "Cspg4", 'Tgfbi', 'Myl9']
fb_genes3 = ['Col1a1', 'Col3a1', 'Pdgfra', 'Scube2', 'Wnt2', 'Cthrc1', 'Fn1', 'Limch1', 'Mfap5','Pi16', 'Aspn','Lum', 'Wnt5a', 'Mki67', 'Top2a', 'Mki67', 'Pdgfrb',"Cspg4", 'Kcnq5',  'Lgr6', 'Acta2',  'Myl9']
fb_genes4 = ['Col1a1', 'Fn1', 'Scube2', 'Cthrc1',  'Pdgfra', 'Gpc6', 'Hhip', 'Mki67', "Cspg4", 'Acta2']
In [76]:
sc.tl.dendrogram(stromal, 'subcluster3')
sc.pl.dotplot(stromal, fb_genes4, groupby='subcluster3', cmap='YlGnBu', save='fb_marker_dotplot.png')
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/anndata/compat/_overloaded_dict.py:106: ImplicitModificationWarning: Trying to modify attribute `._uns` of view, initializing view as actual.
  self.data[key] = value
WARNING: saving figure to file figures/dotplot_fb_marker_dotplot.png
In [88]:
act_fb = adata[adata.obs['subcluster3'].isin([
'Activated FB', 'Proliferating FB', 'Intermediate FB'
])]
act_fb
Out[88]:
View of AnnData object with n_obs × n_vars = 4929 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [89]:
act_fb_genes = ['Pdgfra', 'Tcf21', 'Scube2', 'Cthrc1', 'Acta2', 'Tgfbi', 'Tagln','Col1a1', 'Col1a2', 'Col3a1', 'Fn1', 'Eln', 'Fgfr4', 'Bmp5', 'Axin2', 'Rspo2', 'Wnt2', 'Wnt5a', 'Wnt7b']

sc.pl.matrixplot(act_fb, act_fb_genes, groupby='group', cmap='YlGnBu', standard_scale='var', save='act_fb_genes.png')
WARNING: saving figure to file figures/matrixplot_act_fb_genes.png
In [90]:
sc.pl.stacked_violin(act_fb, fb_genes2, groupby='group', jitter=True, swap_axes=True)
In [91]:
act_fb2 = act_fb[act_fb.obs['group'].isin([
'Control bleo', 'YT-deleted bleo'
])]
act_fb2
Out[91]:
View of AnnData object with n_obs × n_vars = 2883 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [95]:
sc.pl.stacked_violin(act_fb2, ['Col1a1', 'Col3a1', 'Col5a3', 'Col6a3', 'Loxl2', 'Tns1', 'Fn1'], groupby='group', jitter=True, swap_axes=True, save='act_fb_genes.png')
WARNING: saving figure to file figures/stacked_violin_act_fb_genes.png
In [ ]:
sc.pl.dotplot(act_fb, ['Col1a1', 'Col3a1', 'Col5a3', 'Col6a3', 'Loxl2', 'Tns1', 'Fn1'], groupby='group', standard_scale='var', cmap="YlGnBu", save='act_fb_genes_all.png')
In [ ]:
sc.pl.dotplot(act_fb2, ['Col1a1', 'Col3a1', 'Fn1'], groupby='group', swap_axes=True, save='act_fb_genes.png', cmap='YlGnBu')
In [93]:
sc.pl.dotplot(act_fb2, ['Ccl2', 'Mki67'], groupby='group', swap_axes=True, cmap='YlGnBu')
In [98]:
at2 = adata[adata.obs['subcluster3'].isin([
'AT2'
])]
at2
Out[98]:
View of AnnData object with n_obs × n_vars = 2374 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [100]:
sc.pl.dotplot(at2, ['Nkx2-1', 'Scgb1a1', 'Sftpb', 'Sftpc', 'Sftpd', 'Lamp3', 'Abca3', 'Slc34a2', 'Lrrk2', 'Ager'], groupby='group', cmap='YlGnBu', standard_scale='var',  save='at2_genes_lin.png')
sc.pl.stacked_violin(at2, ['Nkx2-1', 'Scgb1a1', 'Sftpb', 'Sftpc', 'Sftpd', 'Lamp3', 'Abca3', 'Slc34a2', 'Lrrk2', 'Ager'], groupby='group', cmap='YlGnBu', standard_scale='var', swap_axes=True,  save='at2_genes_lin_violin.png')
WARNING: saving figure to file figures/dotplot_at2_genes_lin.png
WARNING: saving figure to file figures/stacked_violin_at2_genes_lin_violin.png
In [ ]:
sc.pl.matrixplot(at2, ['Tgfb1', 'Tgfb2', 'Itgav', 'Runx2', 'Serpine1', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='tran_genes_at2.png')
sc.pl.stacked_violin(at2, ['Tgfb1', 'Tgfb2', 'Itgav', 'Runx2', 'Serpine1', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', swap_axes=True,  save='trans_genes_at2.png')
In [ ]:
sc.pl.dotplot(at2, ['Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_at2.png')
In [ ]:
sc.pl.dotplot(at2, ['Tgfb1', 'Tgfb2', 'Itgav', 'Serpine1', 'Cdkn1a', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='intermed_genes_at2.png')
sc.pl.dotplot(intermed, ['Tgfb1', 'Tgfb2', 'Itgav', 'Serpine1', 'Cdkn1a', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='intermed_genes_intermed.png')
In [ ]:
sc.pl.matrixplot(at1, ['Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_at1.png')
In [103]:
at1 = adata[adata.obs['subcluster3'].isin(['AT1 - mature', 'AT1 - immature'
])]
at1
Out[103]:
View of AnnData object with n_obs × n_vars = 262 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [104]:
sc.pl.dotplot(at1, ['Nkx2-1', 'Sox2', 'Scgb1a1', 'Scgb3a2', 'Muc5b', 'Sftpd', 'Lamp3', 'Abca3', 'Slc34a2', 'Lrrk2', 'Ager', 'Hopx', 'Rtkn2', 'Wnt3a', ], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_lin.png')
WARNING: saving figure to file figures/dotplot_at2_genes_lin.png
In [ ]:
epi = adata[adata.obs['subcluster3'].isin(['AT1','Intermediate alveolar',
'Intermediate alveolar - stressed',
'Intermediate alveolar - cell-cycle arrested', 'AT2', 'MCC', 'Secretory'
])]
epi
In [105]:
intermed = adata[adata.obs['subcluster3'].isin(['Intermediate alveolar',
'Intermediate alveolar - stressed',
'AT1 - immature'
])]
intermed
Out[105]:
View of AnnData object with n_obs × n_vars = 1391 × 30759
    obs: 'sample', 'species', 'gene_count', 'tscp_count', 'tscp_count_50dup', 'read_count', 'bc1_well', 'bc2_well', 'bc3_well', 'bc1_wind', 'bc2_wind', 'bc3_wind', 'n_genes', 'doublet_score', 'predicted_doublet', 'mread_count', 'batch', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'leiden', 'genotype', 'treatment', 'group', 'subcluster', 'subcluster1', 'subcluster2', 'subcluster3', 'umap_density_group'
    var: 'gene_id-1', 'genome-1', 'n_cells-1', 'gene_id-252 1/2-2', 'genome-252 1/2-2', 'n_cells-252 1/2-2', 'gene_id-252 3/4-2', 'genome-252 3/4-2', 'n_cells-252 3/4-2', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors', 'sample_colors', 'batch_colors', 'group_colors', 'dendrogram_leiden', 'subcluster1_colors', 'dendrogram_subcluster1', 'rank_genes_groups', 'subcluster2_colors', 'dendrogram_subcluster2', 'dendrogram_subcluster3', 'subcluster3_colors', 'umap_density_group_params'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'
In [ ]:
sc.pl.matrixplot(intermed, ['Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var', save='at2_genes_trans.png')
In [106]:
sc.pp.log1p(intermed)
sc.tl.rank_genes_groups(intermed, 'group', method='wilcoxon')
sc.pl.rank_genes_groups(intermed, n_genes=25, sharey=False)
WARNING: adata.X seems to be already log-transformed.
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_simple.py:373: UserWarning: Received a view of an AnnData. Making a copy.
  view_to_actual(adata)
/opt/miniconda3/envs/scanpy1.9/lib/python3.9/site-packages/scanpy/preprocessing/_simple.py:352: RuntimeWarning: invalid value encountered in log1p
  np.log1p(X, out=X)
In [107]:
sc.pl.dotplot(intermed, ['Nkx2-1','Sox2', 'Sox9', 'Cebpa', 'Meg3', 'Trp63', 'Krt8', 'Etv5', 'Scgb1a1', 'Scgb3a2', 'Muc5b', 'Abca3', 'Lamp3', 'Sftpb', 'Sftpc', 'Sftpd', 'Ager', 'Hopx', 'Rtkn2', 'Wnt3a', 'Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var')
In [ ]:
sc.pl.dotplot(at1, ['Nkx2-1','Sox2', 'Sox9', 'Cebpa', 'Meg3', 'Trp63', 'Krt8', 'Etv5', 'Scgb1a1', 'Scgb3a2', 'Muc5b', 'Abca3', 'Lamp3', 'Sftpb', 'Sftpc', 'Sftpd', 'Ager', 'Hopx', 'Rtkn2', 'Wnt3a', 'Tgfb1', 'Tgfb2', 'Itgav', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var')
In [111]:
sc.pl.dotplot(intermed, ['Tgfb1', 'Tgfb2', 'Itgav', 'Serpine1', 'Gdf15', 'Ccn1', 'Ccn2', 'Wwtr1', 'Yap1', 'Tead4', 'Klf5', 'Nfib'], groupby='group', cmap='YlGnBu', standard_scale='var',  save='aec_genes_trans.png')
WARNING: saving figure to file figures/dotplot_aec_genes_trans.png
In [115]:
sc.pl.dotplot(intermed, ['Ccl2'], groupby='group', cmap='YlGnBu', standard_scale='var', save='intermed_ccl2.png')
sc.pl.dotplot(act_fb, ['Ccl2'], groupby='group', cmap='YlGnBu', standard_scale='var', save='act_fb_ccl2.png')
WARNING: saving figure to file figures/dotplot_intermed_ccl2.png
WARNING: saving figure to file figures/dotplot_act_fb_ccl2.png
In [119]:
sc.settings.set_figure_params(dpi=100, fontsize=14, dpi_save=300, figsize=(4,4), format='png')
markers = ['Col1a1', 'Cthrc1', 'Pdgfra', 'Plp1', 'Pi16', 'Wt1', 'Aplnr', 'Mki67', 'Cspg4', 'Aspn', 'Acta2', 'Itgam', 'Cd14', 'Cd68', 'Spp1', 'Pparg', 'Jchain', 'Cd86', 'Cpa3', 'Foxp3', 'Cd3e','Nkg7', 'Foxj1', 'Rtkn2', 'Krt8', 'Cdkn1a', 'Abca3', 'Scgb1a1']
sc.pl.matrixplot(adata, markers, groupby='subcluster3', cmap='YlGnBu', standard_scale='var', dendrogram=True, save='markers.png')
WARNING: saving figure to file figures/matrixplot_markers.png
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: